


clear all
set more off

***************************************************************************************************
**********************************Do Main Analysis************************************************
***************************************************************************************************
*Inputs: the main analysis dataset
*1. "main_analysis_data.dta"
*2."school_stability_rate.dta"
*3. "school_VA.dta"
*4. "class_size_teacher_data.dta"

*Outputs (in order):
*1. Table 3
*2. Figure 3 (and Figure B.8(b))
*3. Table 4
*4. Table 5
*5. Table B.3
*6. Figure B.5
*7. Figure B.4
*8. Figure B.3
*9. Table 6
*10. Table 7
*11. Table B.4
*12. Figure B.6
***************************************************************************************************
***************************************************************************************************
***************************************************************************************************

use "/data_analysis/Eliso_Complete/Data/main_analysis_data.dta"
*****Create RD variables
gen running_var=distance_calc-5.05
gen treatment=(running_var<=0)
gen inter=running_var*treatment

***************************************************************************************************
*Demographic controls: ell code, gender, ethnicity, age interacted with grade, frl, class and home language
encode langclscode, gen(class_lang)
encode homelangdescr, gen(home_lang)
encode ellevelcode, gen(el_code)
drop homelangdescr langclscode ellevelcode
gen age2=age*age
replace pared=7 if pared==.
replace ethnicity=10 if ethnicity==.

*Create an indicator for not attending assigned school (e.g., not attending attendance zone school)
gen open_enroll=(locationcode!=schreslocationcode)

*Merge on: school stability rate and VA
merge m:1 locationcode year using "/data_analysis/Eliso_Complete/Data/school_stability_rate.dta"
drop if _merge==2
drop _merge
merge m:1 locationcode using  "/data_analysis/Eliso_Complete/Data/school_VA.dta"
drop if _merge==2
drop _merge
*Class Size and Teacher Exp
merge m:1 locationcode year using "/data_analysis/Eliso_Complete/class_size_teacher_data.dta"
drop if _merge==2
drop _merge
gen missing_class_size=(class_size==.)
su class_size
replace class_size=r(mean) if class_size==.




***********************************************************************************************
***********************************************************************************************
**************************Table 3**************************************************************
***********************************************************************************************
***********************************************************************************************

*Test Scores
preserve
keep if year==2015 | year==2016
gen math_growth = math_scorez - lag_math
gen ela_growth = ela_scorez - lag_ela
foreach var of varlist math_growth ela_growth lag_math lag_ela {
su `var' if running_var>=-1 & running_var<=0 & year==2016  & grade<=5
local treat_mean=r(mean)
su `var' if running_var>=0 & running_var<=1 & year==2016  & grade<=5 
local control_mean=r(mean)
di `treat_mean'-`control_mean'
reg `var' treatment if running_var>=-1 & running_var<=1 & year==2016  & grade<=5
}
restore
*Demographics
*Ethnicity
preserve
keep if year==2015 | year==2016
gen hispanic=(ethnicity==5)
gen white=(ethnicity==8)
gen black=(ethnicity==3)
gen asian=(ethnicity==2)
foreach var of varlist hispanic white black asian {
replace `var'=. if ethnicity==10 
replace `var'=. if math_scorez==. | lag_math==.
su `var' if running_var>=-1 & running_var<=0 & year==2016  & grade<=5
local treat_mean=r(mean)
su `var' if running_var>=0 & running_var<=1 & year==2016  & grade<=5 
local control_mean=r(mean)
di `treat_mean'-`control_mean'
reg `var' treatment if running_var>=-1 & running_var<=1 & year==2016  & grade<=5
}
*EL/FRL
gen fl=(frl==3)
gen el=(el_code!=7)
foreach var of varlist fl el { 
replace `var'=. if math_scorez==. | lag_math==.
su `var' if running_var>=-1 & running_var<=0 & year==2016  & grade<=5
local treat_mean=r(mean)
su `var' if running_var>=0 & running_var<=1 & year==2016  & grade<=5 
local control_mean=r(mean)
di `treat_mean'-`control_mean'
reg `var' treatment if running_var>=-1 & running_var<=1 & year==2016  & grade<=5
}
*Parental Education
gen dropout=(pared==5)
gen HS=(pared==4 | pared==6)
gen college=(pared==1 | pared==3)
foreach var of varlist dropout HS college{ 
replace `var'=. if pared==7
replace `var'=. if math_scorez==. | lag_math==.
su `var' if running_var>=-1 & running_var<=0 & year==2016  & grade<=5
local treat_mean=r(mean)
su `var' if running_var>=0 & running_var<=1 & year==2016  & grade<=5 
local control_mean=r(mean)
di `treat_mean'-`control_mean'
reg `var' treatment if running_var>=-1 & running_var<=1 & year==2016  & grade<=5
}
restore

*Stability rate, VA (measured in pre-period of 2014-15)
foreach var of varlist  stability_rate school_VA {
preserve
keep if year==2015 & grade<=5
collapse `var', by(running_var locationcode treatment)
su `var' if running_var>=-1 & running_var<=0 
local treat_mean=r(mean)
su `var' if running_var>=0 & running_var<=1 
local control_mean=r(mean)
di `treat_mean'-`control_mean'
reg `var' treatment if running_var>=-1 & running_var<=1 
restore
}
*Class size and teacher chars
foreach var of varlist class_size below5 above10 {
preserve
keep if year==2016 & grade<=5
collapse `var', by(running_var locationcode treatment)
su `var' if running_var>=-1 & running_var<=0 
local treat_mean=r(mean)
su `var' if running_var>=0 & running_var<=1 
local control_mean=r(mean)
di `treat_mean'-`control_mean'
reg `var' treatment if running_var>=-1 & running_var<=1 
restore
}
***********************************************************************************************
***********************************************************************************************
***********************************************************************************************
***********************************************************************************************
***********************************************************************************************


*Lat controls
gen x2=latitude*latitude
gen y2=longitude*longitude
gen xy=latitude*longitude

*Valid test scores are those with non-missing current and lagged scores
gen valid_math=(math_scorez!=. & lag_math!=.)
gen valid_ela=(ela_scorez!=. & lag_ela!=.)
*Deal with other-subject missing scores with a missing indicator
gen ela_missing=(lag_ela==.)
replace lag_ela=0 if lag_ela==.
gen math_missing=(lag_math==.)
replace lag_math=0 if lag_math==.

*Create an indicator for having switched schools from prior year and distance to that switch (for referee comment)
tsset stdpseudoid year
gen moved_school=(locationcode!=L.locationcode)
replace moved_school=2 if moved_school==.
gen lagged_lat=L.latitude 
gen lagged_lon=L.longitude
geodist latitude longitude lagged_lat lagged_lon, gen(dist_move)
replace dist_move=0 if dist_move==.
gen within_year_move=(schlocationcode!=schcode_spring)

*Control vectors
local demo_controls "i.el_code i.gender i.ethnicity i.frl i.class_lang i.home_lang i.pared open_enroll"
*School controls: (affiliated) charter, magnet
local school_controls "i.charter i.magnet missing_class_size class_size below5"
*Lat-lon space controls
local lat_lon_controls "latitude longitude x2 y2 xy"
local test_controls_math "i.grade#c.lag_math i.grade#c.lag_math#c.lag_math i.grade#c.lag_math#c.lag_math#c.lag_math i.grade#c.lag_ela i.grade#c.lag_ela#c.lag_ela i.grade#c.lag_ela#c.lag_ela#c.lag_ela i.grade i.ela_missing#i.grade#c.lag_math"
local test_controls_ela "i.grade#c.lag_math i.grade#c.lag_math#c.lag_math i.grade#c.lag_math#c.lag_math#c.lag_math i.grade#c.lag_ela i.grade#c.lag_ela#c.lag_ela i.grade#c.lag_ela#c.lag_ela#c.lag_ela i.grade i.math_missing#i.grade#c.lag_ela"
***************************************************************************************************

*Sample restrictions
drop if year>=2017
*Drop grade 2 from 2013 onward (no grade 2 tests those years)
qui compress
replace math_scorez=. if grade==2 & year>2013

***********************************************************************************************
**************************Figure 3*************************************************************
***********************************************************************************************
******REDUCED FORM FIGURES*****
*Figure 2a for math: test score gain by distance -- need small bins to get separate dot for those schools right at 3.37 miles. Also will get # of obs to "weight" the circles
preserve
keep if year==2016 & distance_calc<=6.80 & distance_calc>=3.30 & valid_math==1 & grade<=5
gen math_gain=math_scorez-lag_math
*temp save (will bring back)
keep math_gain distance_calc locationcode
ren math_gain math_gain2
ren distance_calc distance_calc2
ren locationcode locationcode2
save "/data_analysis/Eliso_Complete/Data/temp_Lfit.dta", replace
restore
preserve
keep if year==2016 & distance_calc<=6.80 & distance_calc>=3.30 & valid_math==1 & grade<=5
gen math_gain=math_scorez-lag_math
gen count=1
collapse (count) count (mean) math_gain, by(distance_calc) 
gen dum=1
append using "/data_analysis/Eliso_Complete/Data/temp_Lfit.dta"
erase "/data_analysis/Eliso_Complete/Data/temp_Lfit.dta"

twoway (lfitci math_gain2 distance_calc2 if distance_calc2<=5.05 & dum==., ciplot(rline) blpattern(dash) blcolor(gs6) lcolor(forest_green) range(3.25 5.05) estopts(cluster(locationcode2))) (lfitci math_gain2 distance_calc2 if distance_calc2>=5.05 & dum==., ciplot(rline) blpattern(dash) blcolor(gs6) lcolor(maroon) range(5.05 6.75) estopts(cluster(locationcode2))) (scatter math_gain distance_calc [w=count] if distance_calc<5.05 & dum==1 & count!=132, mcolor(forest_green) msize(small)) (scatter math_gain distance_calc [w=count] if distance_calc<5.05 & dum==1 & count==132, msymbol(circle_hollow) mcolor(forest_green) msize(small)) (scatter math_gain distance_calc [w=count] if distance_calc>=5.05 & dum==1, mcolor(maroon) msize(small) ), xline(5.05) xlabel (3.25(0.5)6.75) ytitle("Math Score Growth ({&sigma})") ylabel(-.3 "-0.30" -.2 "-0.20" -.1 "-0.10" 0 "0" 0.1 "0.10" 0.2 "0.20" 0.3 "0.30" 0.4 "0.40") xtitle("Distance from Gas Leak (miles)") legend(off) graphregion(color(white)) bgcolor(white)
restore

*ELA figure (Figure 2(b))
preserve
keep if year==2016 & distance_calc<=6.80 & distance_calc>=3.30 & valid_ela==1 & grade<=5
gen ela_gain=ela_scorez-lag_ela
*temp save (will bring back)
keep ela_gain distance_calc locationcode
ren ela_gain ela_gain2
ren distance_calc distance_calc2
ren locationcode locationcode2
save "/data_analysis/Eliso_Complete/Data/temp_Lfit.dta", replace
restore
preserve
keep if year==2016 & distance_calc<=6.80 & distance_calc>=3.30 & valid_ela==1 & grade<=5
gen ela_gain=ela_scorez-lag_ela
gen count=1
collapse (count) count (mean) ela_gain, by(distance_calc) 
gen dum=1
append using "/data_analysis/Eliso_Complete/Data/temp_Lfit.dta"
erase "/data_analysis/Eliso_Complete/Data/temp_Lfit.dta"

twoway (lfitci ela_gain2 distance_calc2 if distance_calc2<=5.05 & dum==., ciplot(rline) blpattern(dash) blcolor(gs6) lcolor(forest_green) range(3.25 5.05) estopts(cluster(locationcode2))) (lfitci ela_gain2 distance_calc2 if distance_calc2>=5.05 & dum==., ciplot(rline) blpattern(dash) blcolor(gs6) lcolor(maroon) range(5.05 6.75) estopts(cluster(locationcode2))) (scatter ela_gain distance_calc [w=count] if distance_calc<5.05 & dum==1 & count!=129, mcolor(forest_green) msize(small)) (scatter ela_gain distance_calc [w=count] if distance_calc<5.05 & dum==1 & count==129, msymbol(circle_hollow) mcolor(forest_green) msize(small)) (scatter ela_gain distance_calc [w=count] if distance_calc>=5.05 & dum==1, mcolor(maroon) msize(small) ), xline(5.05) xlabel (3.25(0.5)6.75) ytitle("English Score Growth ({&sigma})") ylabel(-.3 "-0.30" -.2 "-0.20" -.1 "-0.10" 0 "0" 0.1 "0.10" 0.2 "0.20" 0.3 "0.30") xtitle("Distance from Gas Leak (miles)") legend(off) graphregion(color(white)) bgcolor(white)
restore
***********************************************************************************************
***********************************************************************************************
***********************************************************************************************

**************************Figure B.8(b)*********************************************************
cmogram pct_living_5 distance_calc if distance_calc>=3.30 & distance_calc<=6.80 & year==2016 & grade<=5 & valid_math==1, histopts(width(0.0005)) cutpoint(5.05) scatter graphopts(ytitle("Percent Residing Withing 5 Miles of Leak") xline(5.05)) graphopts(xtitle("Distance of School from Gas Leak (miles)")) graphopts(xlabel(#8)) graphopts(ylabel(#10)) lfitopts(lcolor(maroon) alcolor(gs10) alp(dash) est(vce (cluster locationcode))) lfitopts(lcolor(green) alcolor(gs10) alp(dash) est(vce (cluster locationcode)))
************************************************************************************************

***********************************************************************************************
**************************Table 4**************************************************************
***********************************************************************************************
*Math (regressions in column order)
preserve
drop if math_scorez==.
reg math_scorez treatment i.grade#c.lag_math  i.grade#c.lag_ela i.ela_missing i.grade  `school_controls' if running_var>=-1 & running_var<=1 & year==2016 & grade<=5 & grade>=4 & valid_math==1, cluster(locationcode) 
boottest treatment, boottype(wild) reps(999) cluster(locationcode) seed(1234) nograph
areg math_scorez treatment i.grade#c.lag_math  i.grade#c.lag_ela i.ela_missing i.grade  `school_controls' `demo_controls' i.moved_school dist_move i.within_year_move if running_var>=-1 & running_var<=1 & year==2016 & grade<=5 & grade>=4 & valid_math==1, cluster(locationcode) absorb(stdzipcode)
boottest treatment, boottype(wild) reps(999) cluster(locationcode) seed(1234) nograph
areg math_scorez treatment i.grade#c.lag_math  i.grade#c.lag_ela i.ela_missing i.grade  `school_controls' `demo_controls' i.moved_school dist_move i.within_year_move if running_var>=-1 & running_var<=1 & year==2016 & grade<=5 & grade>=4 & valid_math==1 & locationcode!=2117, cluster(locationcode) absorb(stdzipcode)
boottest treatment, boottype(wild) reps(999) cluster(locationcode) seed(1234) nograph
areg math_scorez treatment i.grade#c.lag_math  i.grade#c.lag_ela i.ela_missing i.grade  `school_controls' `demo_controls' i.moved_school dist_move i.within_year_move if running_var>=-1.75 & running_var<=1.75 & year==2016 & grade<=5 & grade>=4 & valid_math==1, cluster(locationcode) absorb(stdzipcode)
boottest treatment, boottype(wild) reps(999) cluster(locationcode) seed(1234) nograph
areg math_scorez treatment i.grade#c.lag_math  i.grade#c.lag_ela i.ela_missing i.grade  `school_controls' `demo_controls' i.moved_school dist_move i.within_year_move if running_var>=-1 & running_var<=1 & year==2016 & grade<=8 & grade>=4 & valid_math==1, cluster(locationcode) absorb(stdzipcode)
boottest treatment, boottype(wild) reps(999) cluster(locationcode) seed(1234) nograph
restore
*English (regressions in column order)
preserve
drop if ela_scorez==.
reg ela_scorez treatment i.grade#c.lag_math  i.grade#c.lag_ela i.math_missing i.grade  `school_controls' if running_var>=-1 & running_var<=1 & year==2016 & grade<=5 & grade>=4 & valid_ela==1, cluster(locationcode) 
boottest treatment, boottype(wild) reps(999) cluster(locationcode) seed(1234) nograph
areg ela_scorez treatment i.grade#c.lag_math  i.grade#c.lag_ela i.math_missing i.grade  `school_controls' `demo_controls' i.moved_school dist_move i.within_year_move if running_var>=-1 & running_var<=1 & year==2016 & grade<=5 & grade>=4 & valid_ela==1, cluster(locationcode) absorb(stdzipcode)
boottest treatment, boottype(wild) reps(999) cluster(locationcode) seed(1234) nograph
areg ela_scorez treatment i.grade#c.lag_math  i.grade#c.lag_ela i.math_missing i.grade  `school_controls' `demo_controls' i.moved_school dist_move i.within_year_move if running_var>=-1 & running_var<=1 & year==2016 & grade<=5 & grade>=4 & valid_ela==1 & locationcode!=2117, cluster(locationcode) absorb(stdzipcode)
boottest treatment, boottype(wild) reps(999) cluster(locationcode) seed(1234) nograph
areg ela_scorez treatment i.grade#c.lag_math  i.grade#c.lag_ela i.math_missing i.grade  `school_controls' `demo_controls' i.moved_school dist_move i.within_year_move if running_var>=-1.75 & running_var<=1.75 & year==2016 & grade<=5 & grade>=4 & valid_ela==1, cluster(locationcode) absorb(stdzipcode)
boottest treatment, boottype(wild) reps(999) cluster(locationcode) seed(1234) nograph
areg ela_scorez treatment i.grade#c.lag_math  i.grade#c.lag_ela i.math_missing i.grade  `school_controls' `demo_controls' i.moved_school dist_move i.within_year_move if running_var>=-1 & running_var<=1 & year==2016 & grade<=8 & grade>=4 & valid_ela==1, cluster(locationcode) absorb(stdzipcode)
boottest treatment, boottype(wild) reps(999) cluster(locationcode) seed(1234) nograph
restore
***********************************************************************************************
***********************************************************************************************
***********************************************************************************************




***********************************************************************************************
**************************Table 5**************************************************************
***********************************************************************************************
*Math
reg math_scorez treatment running_var inter i.grade#c.lag_math  i.grade#c.lag_ela i.ela_missing i.grade  `school_controls' if running_var>=-1.75 & running_var<=1.75 & year==2016 & grade<=5 & grade>=4 & valid_math==1, cluster(locationcode) 
boottest treatment, boottype(wild) reps(999) cluster(locationcode) seed(1234) nograph
areg math_scorez treatment running_var inter i.grade#c.lag_math  i.grade#c.lag_ela i.ela_missing i.grade  `school_controls' `demo_controls' i.moved_school dist_move i.within_year_move if running_var>=-1.75 & running_var<=1.75 & year==2016 & grade<=5 & grade>=4 & valid_math==1, cluster(locationcode) absorb(stdzipcode)
boottest treatment, boottype(wild) reps(999) cluster(locationcode) seed(1234) nograph
areg math_scorez treatment running_var inter i.grade#c.lag_math  i.grade#c.lag_ela i.ela_missing i.grade  `school_controls' `demo_controls' i.moved_school dist_move i.within_year_move `lat_lon_controls' i.bound_seg2 if running_var>=-1.75 & running_var<=1.75 & year==2016 & grade<=5 & grade>=4 & valid_math==1, cluster(locationcode) absorb(stdzipcode)
boottest treatment, boottype(wild) reps(999) cluster(locationcode) seed(1234) nograph
*column (4)
areg math_scorez treatment running_var inter i.grade#c.lag_math  i.grade#c.lag_ela i.ela_missing i.grade  `school_controls' `demo_controls' i.moved_school dist_move i.within_year_move if running_var>=-1.75 & running_var<=1.75 & year==2016 & grade<=5 & grade>=4 & valid_math==1 & locationcode!=2117, cluster(locationcode) absorb(stdzipcode)
*Column (5)
areg math_scorez treatment running_var inter i.grade#c.lag_math  i.grade#c.lag_ela i.ela_missing i.grade  `school_controls' `demo_controls' i.moved_school dist_move i.within_year_move if running_var>=-1.75 & running_var<=1.75 & year==2016 & grade<=8 & grade>=4 & valid_math==1, cluster(locationcode) absorb(stdzipcode)
*English
reg ela_scorez treatment running_var inter i.grade#c.lag_math  i.grade#c.lag_ela i.ela_missing i.grade  `school_controls' if running_var>=-1.75 & running_var<=1.75 & year==2016 & grade<=5 & grade>=4 & valid_ela==1, cluster(locationcode) 
boottest treatment, boottype(wild) reps(999) cluster(locationcode) seed(1234) nograph
areg ela_scorez treatment running_var inter i.grade#c.lag_math  i.grade#c.lag_ela i.ela_missing i.grade  `school_controls' `demo_controls' i.moved_school dist_move i.within_year_move if running_var>=-1.75 & running_var<=1.75 & year==2016 & grade<=5 & grade>=4 & valid_ela==1, cluster(locationcode) absorb(stdzipcode)
boottest treatment, boottype(wild) reps(999) cluster(locationcode) seed(1234) nograph
areg ela_scorez treatment running_var inter i.grade#c.lag_math  i.grade#c.lag_ela i.ela_missing i.grade  `school_controls' `demo_controls' i.moved_school dist_move i.within_year_move `lat_lon_controls' i.bound_seg2 if running_var>=-1.75 & running_var<=1.75 & year==2016 & grade<=5 & grade>=4 & valid_ela==1, cluster(locationcode) absorb(stdzipcode)
boottest treatment, boottype(wild) reps(999) cluster(locationcode) seed(1234) nograph
*column (4)
areg ela_scorez treatment running_var inter i.grade#c.lag_math  i.grade#c.lag_ela i.ela_missing i.grade  `school_controls' `demo_controls' i.moved_school dist_move i.within_year_move if running_var>=-1.75 & running_var<=1.75 & year==2016 & grade<=5 & grade>=4 & valid_ela==1 & locationcode!=2117, cluster(locationcode) absorb(stdzipcode)
*Column (5)
areg ela_scorez treatment running_var inter i.grade#c.lag_math  i.grade#c.lag_ela i.ela_missing i.grade  `school_controls' `demo_controls' i.moved_school dist_move i.within_year_move if running_var>=-1.75 & running_var<=1.75 & year==2016 & grade<=8 & grade>=4 & valid_ela==1, cluster(locationcode) absorb(stdzipcode)
***********************************************************************************************
***********************************************************************************************
***********************************************************************************************

***********************************************************************************************
**************************Table B.3************************************************************
***********************************************************************************************
*Note that columns (1) and (3) are identical to other previously run regressions so not recreating them here
*Quadratic
gen running_var2=running_var*running_var
gen inter2=running_var*running_var*treatment
*Triangular Kernel
gen tri_weight=max(0,1.75-abs(running_var))
*Column (2) - percent living within 5 miles
areg math_scorez treatment running_var inter pct_living_5  i.grade#c.lag_math  i.grade#c.lag_ela i.ela_missing i.grade  `school_controls' `demo_controls' i.moved_school dist_move i.within_year_move if running_var>=-1.75 & running_var<=1.75 & year==2016 & grade<=5 & grade>=4 & valid_math==1, cluster(locationcode) absorb(stdzipcode)
*Math: Quadratic (only have boottest command once, but always run after reg for SEs). For English just change "math_scorez" to "ela_scorez" and "valid_math" to "valid_ela"
areg math_scorez treatment running_var inter  running_var2 inter2 i.grade#c.lag_math  i.grade#c.lag_ela i.ela_missing i.grade  `school_controls' `demo_controls' i.moved_school dist_move i.within_year_move if running_var>=-1.75 & running_var<=1.75 & year==2016 & grade<=5 & grade>=4 & valid_math==1, cluster(locationcode) absorb(stdzipcode)
*Triangular
areg math_scorez treatment running_var inter  i.grade#c.lag_math  i.grade#c.lag_ela i.ela_missing i.grade  `school_controls' `demo_controls' i.moved_school dist_move i.within_year_move [aw=tri_weight] if running_var>=-1.75 & running_var<=1.75 & year==2016 & grade<=5 & grade>=4 & valid_math==1, cluster(locationcode) absorb(stdzipcode)
***********************************************************************************************
***********************************************************************************************
***********************************************************************************************


***********************************************************************************************
**************************Figure B.5***********************************************************
***********************************************************************************************
preserve
mat A=[.,.,.,.]
foreach b of numlist 0.75(0.25)2.5{
di "bandwidth is `b'"
qui gen tri_weight=max(0,`b'-abs(running_var))
qui areg math_scorez treatment running_var inter i.grade#c.lag_math  i.grade#c.lag_ela i.ela_missing i.grade  `school_controls' `demo_controls' i.moved_school dist_move i.within_year_move [aw=tri_weight] if running_var>=-`b' & running_var<=`b' & year==2016 & grade<=5 & grade>=4 & valid_math==1, cluster(locationcode) absorb(stdzipcode)
*Uncomment this line (and comment prior line) for english
*qui areg ela_scorez treatment running_var inter i.grade#c.lag_math  i.grade#c.lag_ela i.ela_missing i.grade  `school_controls' `demo_controls' i.moved_school dist_move i.within_year_move [aw=tri_weight] if running_var>=-`b' & running_var<=`b' & year==2016 & grade<=5 & grade>=4 & valid_ela==1, cluster(locationcode) absorb(stdzipcode)
mat D=e(b)
boottest treatment, boottype(wild) reps(999) cluster(locationcode) seed(1234) nograph level(90)
mat C=r(CI)

mat A=A\[D[1,1], C[1,1], C[1,2], `b']
drop tri_weight
}

svmat A
rename A1 est
rename A2 lcl
rename A3 ucl
rename A4 year_filt

*Generate a graph of the estimates as a function of bandwidth*
graph twoway (connected est year_filt) (rcap ucl lcl year_filt), legend(off) xtitle("Bandwidth") ytitle("Estimated Treatment Effect ({&sigma})") xline(1.75) yline(0, lcolor(black) ) graphregion(color(white)) xlabel(0.75(0.25)2.5)
drop est year_filt lcl ucl
restore
***********************************************************************************************
***********************************************************************************************
***********************************************************************************************

***********************************************************************************************
**************************Figure B.4***********************************************************
***********************************************************************************************
preserve
replace below=0 if below5==.
***FIGURE: Main Estimates by year***
mat A=[.,.,.,.]
foreach year of numlist 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2016 2017{
display "Year is `year'"
qui areg math_scorez treatment running_var inter i.grade#c.lag_math  i.grade#c.lag_ela i.ela_missing i.grade  `school_controls' `demo_controls' i.moved_school dist_move i.within_year_move if running_var>=-1.75 & running_var<=1.75 & year==`year' & grade<=5 & grade>=4 & valid_math==1, cluster(locationcode) absorb(stdzipcode)
*Uncomment this line (and comment prior line) for english
*qui areg ela_scorez treatment running_var inter i.grade#c.lag_math  i.grade#c.lag_ela i.ela_missing i.grade  `school_controls' `demo_controls' i.moved_school dist_move i.within_year_move if running_var>=-1.75 & running_var<=1.75 & year==`year' & grade<=5 & grade>=4 & valid_ela==1, cluster(locationcode) absorb(stdzipcode)
mat D=e(b)
boottest treatment, boottype(wild) reps(999) cluster(locationcode) seed(1234) nograph
mat C=r(CI)

mat A=A\[D[1,1], C[1,1], C[1,2], `year']
}
svmat A
rename A1 est
rename A2 lcl
rename A3 ucl
rename A4 year_filt

*Generate a graph of the estimates as a function of bandwidth, indicating the estimate from a flexible polynomial fit using all of the data in cranberry (currently set at 2)
twoway (bar est year_filt if year_filt>2006 & year_filt<=2013) (rcap ucl lcl year_filt if year_filt>2006 & year_filt<=2013) (bar est year_filt if year_filt==2016 & year_filt==2016) (rcap ucl lcl year_filt if year_filt==2016 & year_filt==2016), legend(off) xtitle("School Year") ytitle("Estimated Treatment Effect ({&sigma})") xline(10) yline(0, lcolor(black) ) graphregion(color(white)) xlabel(2007 "06-07" 2008 "07-08" 2009 "08-09" 2010 "09-10" 2011 "10-11" 2012 "11-12" 2013 "12-13" 2016 "15-16")
drop est year_filt lcl ucl
restore
***********************************************************************************************
***********************************************************************************************
***********************************************************************************************


***********************************************************************************************
**************************Figure B.3 and Table 6************************************************
***********************************************************************************************
preserve
keep if year==2016
**RACE
gen hispanic=1 if ethnicity==5
replace hispanic=0 if ethnicity!=5 & ethnicity!=10
gen white=1 if ethnicity==8
replace white=0 if ethnicity!=8 & ethnicity!=10
gen asia=1 if ethnicity==2
replace asia=0 if ethnicity!=2 & ethnicity!=10
gen black=1 if ethnicity==3
replace black=0 if ethnicity!=3 & ethnicity!=10
*FRL
gen fl=1 if frl==3
replace fl=0 if frl==1
*College Grad
gen college=1 if pared==1 | pared==3 | pared==6
replace college=0 if pared==4| pared==5 
*HS DROP
gen hs_drop=1 if pared==5
replace hs_drop=0 if pared==1 | pared==3| pared==4 | pared==6
*HS Grad
gen hs_grad=1 if pared==4
replace hs_grad=0 if pared==1 | pared==3| pared==5 | pared==6
*EL
gen el=1 if el_code!=.
replace el=0 if el_code==7 & el_code!=.

*Figure B.3
replace running_var=running_var+5
cmogram  lag_math running_var if  running_var<=6.75 & running_var>=3.25 & grade<=5 & grade>=4 & valid_math==1, histopts(width(0.05)) cutpoint(4.99999) scatter lfit graphopts(ytitle("Lagged Mathematics Score") xline(5)) graphopts(xtitle("Distance from Gas Leak (miles)")) graphopts(xlabel(3.25(0.5)6.75)) graphopts(ylabel(#10)) lfitopts(lcolor(green) alcolor(gs10) alp(dash)) lfitopts(lcolor(maroon) alcolor(gs10) alp(dash) est(vce (cluster locationcode)))
cmogram  lag_ela running_var if  running_var<=6.75 & running_var>=3.25 & grade<=5 & grade>=4 & valid_math==1, histopts(width(0.05)) cutpoint(4.99999) scatter lfit graphopts(ytitle("Lagged English Score") xline(5)) graphopts(xtitle("Distance from Gas Leak (miles)")) graphopts(xlabel(3.25(0.5)6.75)) graphopts(ylabel(#10)) lfitopts(lcolor(green) alcolor(gs10) alp(dash)) lfitopts(lcolor(maroon) alcolor(gs10) alp(dash) est(vce (cluster locationcode)))
foreach var of varlist hispanic white asia el frl hs_drop college{
replace `var'=`var'*100
cmogram  `var' running_var if  running_var<=6.75 & running_var>=3.25 & grade<=5 & grade>=4 & valid_math==1, histopts(width(0.05)) cutpoint(4.99999) scatter lfit graphopts(ytitle("Percent Hispanic") xline(5)) graphopts(xtitle("Distance from Gas Leak (miles)")) graphopts(xlabel(3.25(0.5)6.75)) graphopts(ylabel(#10)) lfitopts(lcolor(green) alcolor(gs10) alp(dash)) lfitopts(lcolor(maroon) alcolor(gs10) alp(dash) est(vce (cluster locationcode)))
}
replace running_var=running_var-5

*Table 6
foreach var of varlist hispanic white asia black fl el college hs_grad hs_drop lag_absent lag_math lag_ela {
di "var is `var'"
reg `var' treatment running_var inter i.grade i.bound_seg2 latitude longitude if running_var>=-1.75 & running_var<=1.75 & year==2016 & grade<=5 & valid_math==1, cluster(locationcode)
boottest treatment, boottype(wild) reps(999) cluster(locationcode) seed(1234) nograph 
su `var' if running_var>=-1.75 & running_var<=1.75 & year==2016 & grade<=5 & valid_math==1
}
*Test taking rate
gen took_test=0 if running_var>=-1.75 & running_var<=1.75 & grade<=5 & grade>=4
replace took_test=1 if running_var>=-1.75 & running_var<=1.75 & grade<=5 & valid_math==1 & took_test==0
reg took_test treatment running_var inter i.grade i.bound_seg2 latitude longitude if running_var>=-1.75 & running_var<=1.75 & year==2016 & grade<=5, cluster(locationcode)
boottest treatment, boottype(wild) reps(999) cluster(locationcode) seed(1234) nograph 
su took_test if running_var>=-1.75 & running_var<=1.75 & year==2016 & grade<=5
restore
***********************************************************************************************
***********************************************************************************************
***********************************************************************************************




***********************************************************************************************
******************************Table 7**********************************************************
***********************************************************************************************
gen post=(year>=2016)
gen post_treat=post*treatment
gen post_run=post*running_var
gen post_inter=post*inter

*Math: Quadractic (only have boottest command once, but always run after reg for SEs). For English just change "math_scorez" to "ela_scorez" and "valid_math" to "valid_ela"
*Also just putting "boottest" line after first regression, put after each regression to get the p-values.
reg math_scorez post_treat post_run post_inter treatment running_var inter i.grade#c.lag_math  i.grade#c.lag_ela i.ela_missing i.grade##i.year  `school_controls'  if running_var>=-1.75 & running_var<=1.75 & year>=2011 & grade<=5 & valid_math==1, cluster(locationcode) 
boottest post_treat, boottype(wild) reps(999) cluster(locationcode) seed(1234) nograph 
areg math_scorez post_treat post_run post_inter treatment running_var inter i.grade#c.lag_math  i.grade#c.lag_ela i.ela_missing i.grade##i.year  `school_controls' `demo_controls' i.moved_school dist_move i.within_year_move if running_var>=-1.75 & running_var<=1.75 & year>=2011 & grade<=5 & valid_math==1, cluster(locationcode) absorb(stdzipcode)
*Column (3)
areg math_scorez post_treat post_run post_inter treatment running_var inter i.grade##i.year  `school_controls' i.moved_school dist_move i.within_year_move i.stdzipcode if running_var>=-1.75 & running_var<=1.75 & year>=2011 & grade<=5, cluster(locationcode) absorb(stdpseudoid)
*Column (4)
areg math_scorez post_treat post_run post_inter treatment running_var inter i.grade##i.year  `school_controls' i.moved_school dist_move i.within_year_move i.stdzipcode if running_var>=-1.75 & running_var<=1.75 & year>=2011 & grade<=5 & locationcode!=2117, cluster(locationcode) absorb(stdpseudoid)
*Column (5)
areg math_scorez post_treat post_run post_inter treatment running_var inter i.grade##i.year  `school_controls' i.moved_school dist_move i.within_year_move i.stdzipcode if running_var>=-1.75 & running_var<=1.75 & year>=2011 & grade<=8, cluster(locationcode) absorb(stdpseudoid)
***********************************************************************************************
***********************************************************************************************
***********************************************************************************************

***********************************************************************************************
**************************Table B.4************************************************************
***********************************************************************************************
*Note that columns (1) and (3) are identical to other previously run regressions so not recreating them here
*Quadratic
gen running_var2=running_var*running_var
gen inter2=running_var*running_var*treatment
gen post_run2=post*running_var2
gen post_inter2=post*inter2
*Triangular Kernel
gen tri_weight=max(0,1.75-abs(running_var))
*Column (2) - percent living within 5 miles
areg math_scorez post_treat treatment i.post##c.pct_living_5 i.grade##i.year  `school_controls' i.moved_school dist_move i.within_year_move i.stdzipcode if running_var>=-1.75 & running_var<=1.75 & year>=2011 & grade<=5, cluster(locationcode) absorb(stdpseudoid)
*Math: Quadratic (only have boottest command once, but always run after reg for SEs). For English just change "math_scorez" to "ela_scorez" and "valid_math" to "valid_ela"
areg math_scorez post_treat post_run post_inter treatment running_var running_var2 post_run2 inter inter2 post_inter2 pct_living_5 i.grade##i.year  `school_controls' i.moved_school dist_move i.within_year_move i.stdzipcode if running_var>=-1.75 & running_var<=1.75 & year>=2011 & grade<=5, cluster(locationcode) absorb(stdpseudoid)
*Triangular
areg math_scorez post_treat post_run post_inter treatment running_var inter pct_living_5 i.grade##i.year  `school_controls' i.moved_school dist_move i.within_year_move i.stdzipcode [aw=tri_weight] if running_var>=-1.75 & running_var<=1.75 & year>=2011 & grade<=5, cluster(locationcode) absorb(stdpseudoid)
***********************************************************************************************
***********************************************************************************************
***********************************************************************************************

***********************************************************************************************
******************************Figure B.6*******************************************************
***********************************************************************************************
preserve
***FIGURE: Bandwidth Robustness***
mat A=[.,.,.,.]
foreach b of numlist 0.75(0.25)2.5{
di "bandwidth is `b'"
qui gen tri_weight=max(0,`b'-abs(running_var))
qui areg math_scorez post_treat post_run post_inter treatment running_var inter i.grade##i.year `school_controls' i.moved_school dist_move i.within_year_move i.stdzipcode i.locationcode [aw=tri_weight]  if running_var>=-`b' & running_var<=`b' & year>=2011 & grade<=5, cluster(locationcode) absorb(stdpseudoid)
*Uncomment this line (and comment prior line) for english
*qui areg ela_scorez post_treat post_run post_inter treatment running_var inter i.grade##i.year `school_controls' i.moved_school dist_move i.within_year_move i.stdzipcode i.locationcode [aw=tri_weight]  if running_var>=-`b' & running_var<=`b' & year>=2011 & grade<=5, cluster(locationcode) absorb(stdpseudoid)
mat D=e(b)
*boottest treatment, boottype(wild) reps(999) cluster(locationcode) seed(1234) nograph level(90)
boottest post_treat, boottype(wild) reps(999) cluster(locationcode) seed(1234) nograph 
mat C=r(CI)

mat A=A\[D[1,1], C[1,1], C[1,2], `b']
drop tri_weight
}

svmat A
rename A1 est
rename A2 lcl
rename A3 ucl
rename A4 year_filt

*Generate a graph of the estimates as a function of bandwidth*
graph twoway (connected est year_filt) (rcap ucl lcl year_filt), legend(off) xtitle("Bandwidth") ytitle("Estimated Treatment Effect ({&sigma})") xline(1.75) yline(0, lcolor(black) ) graphregion(color(white)) xlabel(0.75(0.25)2.5)
drop est year_filt lcl ucl
restore
***********************************************************************************************
***********************************************************************************************
***********************************************************************************************

























